/*
vim: syntax=c
*/

#if PACC_ASSERT
#undef NDEBUG
#else
#define NDEBUG 1
#endif
#define PACC_XGLUE(a, b) a ## _ ## b
#define PACC_GLUE(a, b) PACC_XGLUE(a, b)
#define PACC_SYM(s) PACC_GLUE(PACC_NAME, s)
#define PACC_TRACE if (PACC_CAN_TRACE && PACC_SYM(trace))

#include <assert.h>
#include <ctype.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int PACC_SYM(trace) = 0;

static void pacc_panic(const char *e) {
    fprintf(stderr, "pacc: panic: %s\n", e);
    exit(1);
}

static void pacc_nomem(void) { pacc_panic("out of memory"); }

enum pacc_status {
    no_parse, parsed, evaluated, uncomputed
};

typedef size_t *ref_t;

struct _pacc_coord {
    unsigned int line;
    unsigned int col;
};

struct _pacc_coord_memo {
    size_t n;
    struct _pacc_coord c;
};

#EMIT_DECLARATIONS

/* an intermediate result, or "mid" */
struct pacc_mid {
    /* Where in the matrix are we? Note that the bottom two bits of rule
     * also encode a status. */
    long rule; size_t col;
    size_t remainder; /* unparsed string */
    long expr_id; /* id of the expression to evaluate, yielding... */
    union PACC_SYM(vals) value; /* ... the semantic value */
    struct {
        long call_id; /* a call */
        size_t col; /* a column */
    } *evlis;
    size_t ev_alloc;
    size_t ev_valid;
};

#define _pacc_MIDS_PER_BLK (1024 / sizeof(struct pacc_mid) - 1)

/* a block of mids */
struct pacc_blk {
    struct pacc_blk *next;
    struct pacc_mid mid[_pacc_MIDS_PER_BLK];
};

/* a hash bucket */
struct pacc_bkt {
    struct pacc_blk *blk;
    unsigned int valid;
};

static struct pacc_mid *cur;

static void _pacc_save_core() __attribute__((unused));
static void _pacc_save_core(long c, size_t col) {
    PACC_TRACE fprintf(stderr, "_pacc_save_core(%ld, %zu)\n", c, col);
    if (cur->ev_valid == cur->ev_alloc) {
        cur->ev_alloc = cur->ev_alloc * 2 + 1;
        cur->evlis = realloc(cur->evlis, cur->ev_alloc * sizeof(*cur->evlis));
        if (!cur->evlis) pacc_nomem();
    }
    cur->evlis[cur->ev_valid].call_id = c;
    cur->evlis[cur->ev_valid].col = col;
    ++cur->ev_valid;
}

/* a pacc parser */
struct pacc_parser {
    unsigned char *string;
    size_t input_length;
    PACC_TYPE value;
    struct pacc_bkt *m_bkt;
    unsigned int m_bkt_cnt;
    //unsigned char *m_valid;
    //unsigned char m_chain_max;
    
    unsigned char *stack; /* the stack */
    unsigned char *sp; /* next slot in stack */
    unsigned char *stack_alloc; /* last slot in stack */

    /* Dynamic array of error strings */
    char **err;
    size_t err_alloc;
    size_t err_valid;
    /* The highest column to have associated error */
    size_t err_col;

    /* Dynamic array of co-ordinates */
    struct _pacc_coord_memo *coord;
    size_t coord_alloc;
    size_t coord_valid;
};

static void _pacc_push(void *x, size_t s, struct pacc_parser *p) {
    if (p->sp + s >= p->stack_alloc) {
        size_t l = 2 * (p->stack_alloc - p->stack) + s;
        unsigned char *n = realloc(p->stack, l);
        if (!n) pacc_nomem();
        p->sp = n + (p->sp - p->stack);
        p->stack = n;
        p->stack_alloc = n + l + 1;
    }
    assert(p->sp >= p->stack && p->sp + s < p->stack_alloc);
    memcpy(p->sp, x, s);
    p->sp += s;
}

#define _pacc_Push(v) _pacc_push(&(v), sizeof (v), _pacc)

static void *_pacc_pop(size_t s, struct pacc_parser *p) {
    assert(p->sp - s >= p->stack);
    p->sp -= s;
    return p->sp;
}

#define _pacc_Pop(v) memcpy(&(v), _pacc_pop(sizeof (v), _pacc), sizeof (v))
#define _pacc_Discard(v) ((void)_pacc_pop(sizeof (v), _pacc))

#define ref() (&cur->col)
#define ref_0(a) (_pacc->string[*a])
#define ref_dup(a) (_pacc_ref_dup((a), _pacc))
#define ref_len(a) ((a)[1] - (a)[0])
#define ref_ptr(a) (_pacc->string + *(a))
#define ref_str() (_pacc_ref_dup(ref(), _pacc))
#define ref_streq(a, b) (_pacc_ref_streq((a), (b), _pacc))

static char *_pacc_ref_dup() __attribute__((unused));
static char *_pacc_ref_dup(ref_t a, struct pacc_parser *p) {
    char *restrict r;
    const char *restrict s;
    size_t l;

    l = a[1] - a[0];
    r = realloc(0, l + 1); if (!r) pacc_nomem();
    s = (char *)p->string + a[0];
    strncpy(r, s, l);
    r[l] = '\0';
    return r;
}


// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.

#define PACC_UTF8_ACCEPT 0
#define PACC_UTF8_REJECT 12

static const uint8_t utf8d[] = {
  // The first part of the table maps bytes to character classes that
  // to reduce the size of the transition table and create bitmasks.
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,

  // The second part is a transition table that maps a combination
  // of a state of the automaton and a character class to a state.
   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
  12,36,12,12,12,12,12,12,12,12,12,12, 
};

inline static uint32_t
pacc_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
  uint32_t type = utf8d[byte];

  *codep = (*state != PACC_UTF8_ACCEPT) ?
    (byte & 0x3fu) | (*codep << 6) :
    (0xff >> type) & (byte);

  *state = utf8d[256 + *state + type];
  return *state;
}

/*

License

Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

*/

/* _pacc_utf8_char attempts to read a single UTF8 encoded character from
 * the string p with length m. Its return value is the number of bytes
 * in the character, 0 in case of error; *c holds the character read. */
static int _pacc_utf8_char(unsigned char *p, int m, uint32_t *c) {
    uint32_t state = PACC_UTF8_ACCEPT;
    int i = 0;
    do
        if (state == PACC_UTF8_REJECT || i == m) return 0;
    while (pacc_decode(&state, c, p[i++]));
    return i;
}

static int _pacc_ref_streq() __attribute__((unused));
static int _pacc_ref_streq(ref_t a, char *b, struct pacc_parser *p) {
    /* XXX this could be made quicker */
    if (strlen(b) != (size_t)(a[1] - a[0])) return 0;
    return strncmp((const char *)p->string + a[0], b, a[1] - a[0]) == 0; 
}

/* Find the largest available prime which is smaller than the target
 * bucket count. This exponential selection of primes was generated by
 * Dan Bernstein's primegen-0.97 package, using the following:

perl -le '$x=1;while(($e=int(exp($x/1)))<2**32){print "./primes ",$e," ",100+$e," | sed 1q";++$x}' | sh

 * except that I replaced 2 with 3.
 */
static void _pacc_set_bkt_cnt(struct pacc_parser *p) {
    static unsigned int primes[] = {
        3, 7, 23, 59, 149, 409, 1097, 2999, 8111, 22027, 59879, 162779, 442439,
        1202609, 3269029, 8886113, 24154957, 65659969, 178482319, 485165237,
        1318815761, 3584912873U
    };
    int i, p_sz = sizeof(primes) / sizeof(*primes);
    unsigned long max = n_rules * (p->input_length + 1) / 100;
    //fprintf(stderr, "rules = %ld, length = %ld, max == %ld\n", n_rules, p->input_length, max);

    for (i = 1; i < p_sz; ++i)
        if (primes[i] > max) break;
    p->m_bkt_cnt = primes[i - 1];
}

struct pacc_parser *PACC_SYM(new)(void) {
    struct pacc_parser *p;

    p = realloc(0, sizeof *p);
    if (!p) pacc_nomem();
    //p->m_chain_max = 0;
    p->sp = 0;
    p->stack = p->stack_alloc = 0;
    p->err = 0;
    p->err_alloc = p->err_valid = 0;
    p->err_col = 0;
    p->coord = 0;
    p->coord_alloc = p->coord_valid = 0;
    return p;
}

void PACC_SYM(input)(struct pacc_parser *p, char *s, size_t l) {
    unsigned int i;

    p->string = (unsigned char *)s;
    p->input_length = l;
    _pacc_set_bkt_cnt(p);
    p->m_bkt = realloc(0, sizeof(struct pacc_bkt) * p->m_bkt_cnt);
    if (!p->m_bkt) pacc_nomem();
    //p->m_valid = realloc(0, 2 * p->m_bkt_cnt);
    for (i = 0; i < p->m_bkt_cnt; ++i) {
        p->m_bkt[i].blk = 0;
        p->m_bkt[i].valid = 0;
        //p->m_valid[i * 2] = 0; /* valid */
        //p->m_valid[i * 2 + 1] = 0; /* allocated */
    }
}

static void _pacc_destroy_blk_chain(struct pacc_blk *blk) {
    if (blk) {
        _pacc_destroy_blk_chain(blk->next);
        free(blk);
    }
}

static void _pacc_destroy_bucket(struct pacc_bkt *bkt) {
    unsigned int i;
    struct pacc_blk *blk = bkt->blk;

    for (i = 0; i < bkt->valid; ++i) {
        struct pacc_mid *m;

        if (i > 0 && i % _pacc_MIDS_PER_BLK == 0)
            blk = blk->next;
        m = blk->mid + i % _pacc_MIDS_PER_BLK;
        free(m->evlis);
    }
    _pacc_destroy_blk_chain(bkt->blk);
}

void PACC_SYM(destroy)(struct pacc_parser *p) {
    unsigned int i;

    for (i = 0; i < p->m_bkt_cnt; ++i)
        _pacc_destroy_bucket(p->m_bkt + i);
    free(p->m_bkt);
    free(p->err);
    free(p->coord);
    free(p->stack);
    free(p);
}

/* hash table */
static struct pacc_mid *_pacc_result(struct pacc_parser *p,
        size_t col, long rule) {
    unsigned int i;
    unsigned int h;
    struct pacc_blk *blk;
    struct pacc_bkt *bkt;
    struct pacc_mid *r;

    assert(col < p->input_length + 1);
    PACC_TRACE fprintf(stderr, "_pacc_result(%zu, %ld)\n", col, rule);
    h = (col + (rule << 6) + (rule << 16) - rule) % p->m_bkt_cnt;
    bkt = p->m_bkt + h;
    blk = bkt->blk;
    r = blk->mid;
    for (i = 0; i < bkt->valid; ++i, ++r) {
        if (i > 0 && i % _pacc_MIDS_PER_BLK == 0) {
            blk = blk->next;
            r = blk->mid;
        }
        if (r->col == col && r->rule >> 4 == rule)
            return r;
    }
    if (bkt->valid++ % _pacc_MIDS_PER_BLK == 0) {
        struct pacc_blk *b = realloc(0, sizeof *b);
        if (!b) pacc_nomem();
        if (blk) blk->next = b;
        else bkt->blk = b;
        blk = b;
        blk->next = 0;
        r = blk->mid;
    }
    /* Initialize the new element. */
    r->col = col; r->rule = rule << 4 | uncomputed;
    r->evlis = 0;
    r->ev_alloc = r->ev_valid = 0;
    /* Correct use of a side effect in an (unfailing) assert. */
    assert((r->expr_id = 0) == 0);
    return r;
}

static int _pacc_error(struct pacc_parser *_pacc, char *what, size_t col) {
    int doit, append;

    PACC_TRACE fprintf(stderr, "_pacc_error(%s, %zu)\n", what, col);
    append = doit = 1;
    if (col > _pacc->err_col) append = 0;
    else if (col == _pacc->err_col) {
        size_t i;
        for (i = 0; i < _pacc->err_valid; ++i) {
            if (strcmp(_pacc->err[i], what) == 0) doit = 0;
        }
    } else doit = 0;
    if (doit) {
        if (append) ++_pacc->err_valid;
        else _pacc->err_valid = 1;
        if (_pacc->err_valid > _pacc->err_alloc) {
            _pacc->err_alloc = 2 * _pacc->err_alloc + 1;
            _pacc->err = realloc(_pacc->err, _pacc->err_alloc * sizeof(char *));
            if (!_pacc->err) pacc_nomem();
        }
        _pacc->err[_pacc->err_valid - 1] = what;
        _pacc->err_col = col;
    }
    return 0; /* for the fail() macro */
}

/* Given a parser p, and a column col, return the "coordinates" in a
 * struct _pacc_coord. Both line and col are 1-based. */

static struct _pacc_coord _pacc_coords(struct pacc_parser *p, size_t col) {
    unsigned int i;
    int x, y, hi, lo;
    struct _pacc_coord_memo *c, *cs = p->coord;

    /* binary search */
    lo = 0; hi = p->coord_valid;
    while (lo < hi) {
        int mid = (lo + hi) / 2;
        c = cs + mid;
        if (c->n < col) lo = mid + 1;
        else if (c->n > col) hi = mid;
        else return c->c;
    }

    /* increase list if necessary */
    if (p->coord_valid + 1 > p->coord_alloc) {
        struct _pacc_coord_memo *n;
        p->coord_alloc = p->coord_alloc * 2 + 1;
        n = realloc(p->coord, p->coord_alloc * sizeof *n);
        if (!n) pacc_nomem();
        cs = p->coord = n;
    }

    /* move hole to the right place */
    memmove(cs + lo + 1, cs + lo, (p->coord_valid - lo) * sizeof *cs);
    ++p->coord_valid;

    /* find the coordinates */
    c = cs + lo; c->n = col;
    if (lo > 0) {
        /* start from last position */
        struct _pacc_coord_memo *l = cs + lo - 1;
        y = l->c.line; x = l->c.col; i = l->n;
    } else {
        /* 1-based counting for rows and columns */
        y = 1; x = 1; i = 0;
    }
    while (i < col) {
        uint32_t c;
        int l = _pacc_utf8_char(p->string + i, p->input_length - i, &c);
        if (!l) pacc_panic("invalid UTF-8 input");
        i += l;
        ++x;
        if (c == '\n') { ++y; x = 1; }
    }

    /* memoize them */
    c->c.line = y;
    c->c.col = x;
    return c->c;
}
/* The userland version */
#define PACC_LINE (_pacc_coords(_pacc, _x).line)
#define PACC_COL (_pacc_coords(_pacc, _x).col)

static void pacc_pr(char **buf, size_t *l, char *fmt, ...) {
    size_t n;
    va_list argp;

    va_start(argp, fmt);
    n = vsnprintf(0, 0, fmt, argp);
    va_end(argp);

    *buf = realloc(*buf, *l + n + 1);
    if (!buf) pacc_nomem();

    va_start(argp, fmt);
    vsnprintf(*buf + *l, n + 1, fmt, argp);
    va_end(argp);
    *l += n;
}

char *PACC_SYM(pos)(struct pacc_parser *p, const char *s) {
    struct _pacc_coord c;
    size_t l = 0;
    char *r = 0;

    c = _pacc_coords(p, p->err_col);
    pacc_pr(&r, &l, "%d:%d: %s", c.line, c.col, s);

    return r;
}

char *PACC_SYM(error)(struct pacc_parser *p) {
    size_t i, l = 0;
    char *r = 0;

    /* XXX this, or something like it, is very handy, and needs to go
     * in. But we're holding off for now as all failing test cases would
     * need to be changed. */
    //printf("got `%c', ", p->string[p->err_col]); /* XXX UTF-8? */

    if (p->err_valid && p->err[0][0] == '.')
        pacc_pr(&r, &l, "expected ");
    for (i = 0; i < p->err_valid; ++i) {
        char *s = p->err[i];
        if (*s == '.') ++s;
        for ( ; *s; ++s) {
            if (isprint(*s)) pacc_pr(&r, &l, "%c", *s);
            else switch (*s) {
                case '\n':
                    pacc_pr(&r, &l, "\\n");
                    break;
                default:
                    pacc_pr(&r, &l, "\\x%02x", *s);
                    break;
            }
        }

        if (i + 1 < p->err_valid) {
            pacc_pr(&r, &l, ", ");
            if (i + 2 == p->err_valid) pacc_pr(&r, &l, "or ");
        }
    }
    return PACC_SYM(pos)(p, r);
    //printf("%s\n", r);
    //printf("(column %ld)\n", _pacc->err_col);
}

int PACC_SYM(parse)(struct pacc_parser *_pacc) {
    enum pacc_status _status;
    int _cont = -1, _st, _pacc_any_i, _evaling = 0;
    size_t _x = 0, _x_rule, _pos, _pacc_ev_i;
    uint32_t _pacc_utf_cp;

#if 0
    fprintf(stderr, "sizeof mid %ld, sizeof blk %ld, mids/blk %ld\n",
            sizeof(struct pacc_mid), sizeof(struct pacc_blk),
            pacc_MIDS_PER_BLK);
    fprintf(stderr, "bucket count is %d\n", _pacc->m_bkt_cnt);
#endif

#EMIT_ENGINE

    _x = 0;
    cur = _pacc_result(_pacc, _x, start_rule_id);
    /* pacc grammars are anchored on the right */
    if ((cur->rule & 3) == parsed && cur->remainder != _pacc->input_length) {
        _pacc_error(_pacc, ".end-of-input", cur->remainder);
        cur->rule = (cur->rule & ~3) | no_parse;
    }
    if (!_evaling && (cur->rule & 3) == parsed) {
        unsigned char *stack_save;

        PACC_TRACE fprintf(stderr, "PARSED! Time to start eval...\n");
        _evaling = 1;
        _pacc_ev_i = 0;
    eval_loop:
        PACC_TRACE fprintf(stderr, "eval loop with _pacc_ev_i == %zu\n",
                _pacc_ev_i);
        stack_save = _pacc->sp;
    eval1:
        while (_pacc_ev_i < cur->ev_valid) {
            int col, rule;
            rule = cur->evlis[_pacc_ev_i].call_id;
            col = cur->evlis[_pacc_ev_i].col;
            ++_pacc_ev_i;
            PACC_TRACE fprintf(stderr, "eval loop: r%d @ c%d\n", rule, col);
            _pacc_Push(cur);
            _pacc_Push(_pacc_ev_i);
            cur = _pacc_result(_pacc, col, rule);
            _pacc_ev_i = 0;
        }
        if ((cur->rule & 3) != evaluated && cur->expr_id) {
            _x = cur->col;
            _st = cur->expr_id;
            goto top;
        }
    _pacc_expr_done:
        cur->rule = (cur->rule & ~3) | evaluated;
        if (_pacc->sp != stack_save) {
            _pacc_Pop(_pacc_ev_i);
            _pacc_Pop(cur);
            goto eval1;
        }
        PACC_TRACE fprintf(stderr, "eval finished\n");
        goto contin;
    }

    PACC_TRACE {
        unsigned int min, max;
        unsigned int i;
        unsigned long a, v;

        a = v = 0;
        min = -1; max = 0;
        for (i = 0; i < _pacc->m_bkt_cnt; ++i) {
            unsigned int valid = _pacc->m_bkt[i].valid;
            unsigned int alloc;

            alloc = _pacc_MIDS_PER_BLK * ((valid % _pacc_MIDS_PER_BLK != 0) + valid / _pacc_MIDS_PER_BLK);
            fprintf(stderr, "%d/%d ", valid, alloc);
            v += valid; a += alloc;
            if (valid < min)
                min = valid;
            if (valid > max)
                max = valid;
        }
        fprintf(stderr, "\n");
        fprintf(stderr, "used %u buckets\n", _pacc->m_bkt_cnt);
        fprintf(stderr, "chain length from %d to %d\n", min, max);
        fprintf(stderr, "total used/allocated: %ld/%ld\n", v, a);
        fprintf(stderr, "coord_valid: %zu\n", _pacc->coord_valid);
        fprintf(stderr, "coord_alloc: %zu\n", _pacc->coord_alloc);
    }

    if ((cur->rule & 3) == evaluated) {
        PACC_TRACE fprintf(stderr, "parsed with value " TYPE_PRINTF "\n", cur->value.u0); /* XXX u0 */
        _pacc->value = cur->value.u0;
    } else if ((cur->rule & 3) == parsed) {
        PACC_TRACE fprintf(stderr, "parsed with void value\n");
    } else PACC_TRACE fprintf(stderr, "not parsed\n");
    return (cur->rule & 3) == evaluated;
}

PACC_TYPE PACC_SYM(result)(struct pacc_parser *p) {
    return p->value;
}

int PACC_SYM(wrap)(const char *name, char *addr, size_t l, PACC_TYPE *result) {
    int parsed;
    struct pacc_parser *p;

    p = PACC_SYM(new)();

    PACC_SYM(input)(p, addr, l);
    parsed = PACC_SYM(parse)(p);
    if (parsed) *result = PACC_SYM(result)(p);
    else {
        char *e = PACC_SYM(error)(p);
        fprintf(stderr, "%s:%s\n", name, e);
        free(e);
    }

    PACC_SYM(destroy)(p);

    return parsed;
}
